Assignment #5

Load the packages to be used and load the singer data set.

suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(forcats))
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(gapminder))

Part 1: Factor management

Factors are how categorical data are stored. The values a factor can take on are called levels. It is important to check variable types. What you think are characters may actually be stored numerically. Let’s explore the data frame.

summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 

Before beginning the factor management exercise, let’s get to know the factors in the gapminder data set.

class(gapminder$country)
## [1] "factor"
class(gapminder$continent)
## [1] "factor"
class(gapminder$year)
## [1] "integer"
class(gapminder$lifeExp)
## [1] "numeric"
class(gapminder$pop)
## [1] "integer"
class(gapminder$gdpPercap)
## [1] "numeric"

We see that we have two factors in this data set. Let’s see the levels for “country” and “continent” before we begin filtering the data.

nlevels(gapminder$country)
## [1] 142
nlevels(gapminder$continent)
## [1] 5
levels(gapminder$country)
##   [1] "Afghanistan"              "Albania"                 
##   [3] "Algeria"                  "Angola"                  
##   [5] "Argentina"                "Australia"               
##   [7] "Austria"                  "Bahrain"                 
##   [9] "Bangladesh"               "Belgium"                 
##  [11] "Benin"                    "Bolivia"                 
##  [13] "Bosnia and Herzegovina"   "Botswana"                
##  [15] "Brazil"                   "Bulgaria"                
##  [17] "Burkina Faso"             "Burundi"                 
##  [19] "Cambodia"                 "Cameroon"                
##  [21] "Canada"                   "Central African Republic"
##  [23] "Chad"                     "Chile"                   
##  [25] "China"                    "Colombia"                
##  [27] "Comoros"                  "Congo, Dem. Rep."        
##  [29] "Congo, Rep."              "Costa Rica"              
##  [31] "Cote d'Ivoire"            "Croatia"                 
##  [33] "Cuba"                     "Czech Republic"          
##  [35] "Denmark"                  "Djibouti"                
##  [37] "Dominican Republic"       "Ecuador"                 
##  [39] "Egypt"                    "El Salvador"             
##  [41] "Equatorial Guinea"        "Eritrea"                 
##  [43] "Ethiopia"                 "Finland"                 
##  [45] "France"                   "Gabon"                   
##  [47] "Gambia"                   "Germany"                 
##  [49] "Ghana"                    "Greece"                  
##  [51] "Guatemala"                "Guinea"                  
##  [53] "Guinea-Bissau"            "Haiti"                   
##  [55] "Honduras"                 "Hong Kong, China"        
##  [57] "Hungary"                  "Iceland"                 
##  [59] "India"                    "Indonesia"               
##  [61] "Iran"                     "Iraq"                    
##  [63] "Ireland"                  "Israel"                  
##  [65] "Italy"                    "Jamaica"                 
##  [67] "Japan"                    "Jordan"                  
##  [69] "Kenya"                    "Korea, Dem. Rep."        
##  [71] "Korea, Rep."              "Kuwait"                  
##  [73] "Lebanon"                  "Lesotho"                 
##  [75] "Liberia"                  "Libya"                   
##  [77] "Madagascar"               "Malawi"                  
##  [79] "Malaysia"                 "Mali"                    
##  [81] "Mauritania"               "Mauritius"               
##  [83] "Mexico"                   "Mongolia"                
##  [85] "Montenegro"               "Morocco"                 
##  [87] "Mozambique"               "Myanmar"                 
##  [89] "Namibia"                  "Nepal"                   
##  [91] "Netherlands"              "New Zealand"             
##  [93] "Nicaragua"                "Niger"                   
##  [95] "Nigeria"                  "Norway"                  
##  [97] "Oman"                     "Pakistan"                
##  [99] "Panama"                   "Paraguay"                
## [101] "Peru"                     "Philippines"             
## [103] "Poland"                   "Portugal"                
## [105] "Puerto Rico"              "Reunion"                 
## [107] "Romania"                  "Rwanda"                  
## [109] "Sao Tome and Principe"    "Saudi Arabia"            
## [111] "Senegal"                  "Serbia"                  
## [113] "Sierra Leone"             "Singapore"               
## [115] "Slovak Republic"          "Slovenia"                
## [117] "Somalia"                  "South Africa"            
## [119] "Spain"                    "Sri Lanka"               
## [121] "Sudan"                    "Swaziland"               
## [123] "Sweden"                   "Switzerland"             
## [125] "Syria"                    "Taiwan"                  
## [127] "Tanzania"                 "Thailand"                
## [129] "Togo"                     "Trinidad and Tobago"     
## [131] "Tunisia"                  "Turkey"                  
## [133] "Uganda"                   "United Kingdom"          
## [135] "United States"            "Uruguay"                 
## [137] "Venezuela"                "Vietnam"                 
## [139] "West Bank and Gaza"       "Yemen, Rep."             
## [141] "Zambia"                   "Zimbabwe"
levels(gapminder$continent)
## [1] "Africa"   "Americas" "Asia"     "Europe"   "Oceania"

Now let’s work towards dropping “Oceoania”.

We will start by filtering the gapminder data to remove observations associated with the continent of Oceania.

No_Oceania <- gapminder %>%
  filter(continent == "Africa" | continent =="Americas"|continent == "Asia"|continent == "Europe")
No_Oceania %>% 
  sample_frac(0.1) %>% #just showing a sample of the data
  knitr:: kable(format = "markdown", justify = "centre")
country continent year lifeExp pop gdpPercap
Czech Republic Europe 2002 75.510 10256295 17596.2102
Korea, Dem. Rep. Asia 1982 69.100 17647518 4106.5253
Paraguay Americas 2002 70.755 5884491 3783.6742
Angola Africa 1977 39.483 6162675 3008.6474
Uganda Africa 1962 45.344 7688797 767.2717
Djibouti Africa 1967 42.074 127617 3020.0505
Bangladesh Asia 1992 56.018 113704579 837.8102
Switzerland Europe 1982 76.210 6468126 28397.7151
Belgium Europe 1952 68.000 8730405 8343.1051
Brazil Americas 1967 57.632 88049823 3429.8644
Eritrea Africa 2002 55.240 4414865 765.3500
Albania Europe 1977 68.930 2509048 3533.0039
India Asia 1957 40.249 409000000 590.0620
Angola Africa 1992 40.647 8735988 2627.8457
Montenegro Europe 1992 75.435 621621 7003.3390
Congo, Dem. Rep. Africa 1972 45.989 23007669 904.8961
Denmark Europe 1962 72.350 4646899 13583.3135
Central African Republic Africa 1962 39.475 1523478 1193.0688
Congo, Dem. Rep. Africa 1992 45.548 41672143 457.7192
Netherlands Europe 2007 79.762 16570613 36797.9333
Malawi Africa 1962 38.410 3628608 427.9011
United Kingdom Europe 1967 71.360 54959000 14142.8509
Eritrea Africa 1977 44.535 2512642 505.7538
Guatemala Americas 1997 66.322 9803875 4684.3138
Mauritius Africa 1962 60.246 701016 2529.0675
Trinidad and Tobago Americas 1952 59.100 662850 3023.2719
Bosnia and Herzegovina Europe 1967 64.790 3585000 2172.3524
Austria Europe 1987 74.940 7578903 23687.8261
Botswana Africa 1952 47.622 442308 851.2411
Colombia Americas 2002 71.682 41008227 5755.2600
Austria Europe 1977 72.170 7568430 19749.4223
Benin Africa 2007 56.728 8078314 1441.2849
Austria Europe 1952 66.800 6927772 6137.0765
Serbia Europe 1972 68.700 8313288 10522.0675
Nicaragua Americas 1972 55.151 2182908 4688.5933
Mongolia Asia 1962 48.251 1010280 1056.3540
Namibia Africa 1997 58.909 1774766 3899.5243
Norway Europe 1972 74.340 3933004 18965.0555
Costa Rica Americas 1992 75.713 3173216 6160.4163
Morocco Africa 1967 50.335 14770296 1711.0448
Austria Europe 1982 73.180 7574613 21597.0836
Burkina Faso Africa 1992 50.260 8878303 931.7528
United Kingdom Europe 1992 76.420 57866349 22705.0925
Angola Africa 2002 41.003 10866106 2773.2873
Morocco Africa 1987 62.677 22987397 2755.0470
Taiwan Asia 1952 58.500 8550362 1206.9479
El Salvador Americas 1967 55.855 3232927 4358.5954
El Salvador Americas 1987 63.154 4842194 4140.4421
Morocco Africa 1952 42.873 9939217 1688.2036
Sri Lanka Asia 2002 70.815 19576783 3015.3788
Hong Kong, China Asia 1982 75.450 5264500 14560.5305
Slovenia Europe 1977 70.970 1746919 15277.0302
Austria Europe 2007 79.829 8199783 36126.4927
Somalia Africa 1957 34.977 2780415 1258.1474
Brazil Americas 1972 59.504 100840058 4985.7115
Central African Republic Africa 1997 46.066 3696513 740.5063
Swaziland Africa 1967 46.633 420690 2613.1017
Afghanistan Asia 1972 36.088 13079460 739.9811
Indonesia Asia 1997 66.041 199278000 3119.3356
Trinidad and Tobago Americas 2002 68.976 1101832 11460.6002
Bahrain Asia 2007 75.635 708573 29796.0483
Romania Europe 1967 66.800 19284814 6470.8665
Slovak Republic Europe 1962 70.330 4237384 7481.1076
Trinidad and Tobago Americas 1967 65.400 960155 5621.3685
Panama Americas 2002 74.712 2990875 7356.0319
Jamaica Americas 1957 62.610 1535090 4756.5258
Guinea Africa 2007 56.007 9947814 942.6542
Hong Kong, China Asia 1967 70.000 3722800 6197.9628
Lebanon Asia 1997 70.265 3430388 8754.9639
Benin Africa 1982 50.904 3641603 1277.8976
Libya Africa 1972 52.773 2183877 21011.4972
Malaysia Asia 1962 55.737 8906385 2036.8849
Hong Kong, China Asia 1977 73.600 4583700 11186.1413
Lebanon Asia 1992 69.292 3219994 6890.8069
Switzerland Europe 1992 78.030 6995447 31871.5303
Guatemala Americas 1957 44.142 3640876 2617.1560
Niger Africa 1962 39.487 4076008 997.7661
Uganda Africa 2002 47.813 24739869 927.7210
Jamaica Americas 1952 58.530 1426095 2898.5309
Libya Africa 1962 47.808 1441863 6757.0308
Swaziland Africa 1987 57.678 779348 3984.8398
Ecuador Americas 1952 48.357 3548753 3522.1107
Central African Republic Africa 1977 46.775 2167533 1109.3743
Hong Kong, China Asia 1957 64.750 2736300 3629.0765
Israel Asia 2002 79.696 6029529 21905.5951
Saudi Arabia Asia 1967 49.901 5618198 16903.0489
Mongolia Asia 2007 66.803 2874127 3095.7723
Cameroon Africa 1957 40.428 5359923 1313.0481
Togo Africa 1982 55.471 2644765 1344.5780
Germany Europe 1957 69.100 71019069 10187.8267
Slovak Republic Europe 1992 71.380 5302888 9498.4677
Guinea Africa 1977 40.762 4227026 874.6859
Nigeria Africa 1997 47.464 106207839 1624.9413
Poland Europe 1952 61.310 25730551 4029.3297
Niger Africa 1997 51.313 9666252 580.3052
Ethiopia Africa 2002 50.725 67946797 530.0535
Kuwait Asia 1977 69.343 1140357 59265.4771
Burkina Faso Africa 1997 50.324 10352843 946.2950
Kuwait Asia 1992 75.190 1418095 34932.9196
Comoros Africa 2007 65.152 710960 986.1479
Israel Asia 1972 71.630 3095893 12786.9322
Venezuela Americas 1962 60.770 8143375 8422.9742
Honduras Americas 2007 70.198 7483763 3548.3308
Jordan Asia 1992 68.015 3867409 3431.5936
Tanzania Africa 1992 50.440 26605473 825.6825
Panama Americas 1962 61.817 1215725 3536.5403
Gambia Africa 1987 49.265 848406 611.6589
Croatia Europe 2002 74.876 4481020 11628.3890
Somalia Africa 1992 39.658 6099799 926.9603
Nigeria Africa 2002 46.608 119901274 1615.2864
Canada Americas 1952 68.750 14785584 11367.1611
China Asia 1982 65.525 1000281000 962.4214
Spain Europe 1957 66.660 29841614 4564.8024
Haiti Americas 1992 55.089 6326682 1456.3095
Swaziland Africa 1992 58.474 962344 3553.0224
Djibouti Africa 1972 44.366 178848 3694.2124
Nigeria Africa 2007 46.859 135031164 2013.9773
Swaziland Africa 2007 39.613 1133066 4513.4806
Namibia Africa 2007 52.906 2055080 4811.0604
Slovenia Europe 1997 75.130 2011612 17161.1073
Montenegro Europe 1972 70.636 527678 7778.4140
Uruguay Americas 1992 72.752 3149262 8137.0048
Dominican Republic Americas 1967 56.751 4049146 1653.7230
Equatorial Guinea Africa 1997 48.245 439971 2814.4808
Turkey Europe 1977 59.507 42404033 4269.1223
Bolivia Americas 1967 45.032 4040665 2586.8861
Mexico Americas 1967 60.110 47995559 5754.7339
Cambodia Asia 1962 43.415 6083619 496.9136
Montenegro Europe 2002 73.981 720230 6557.1943
Swaziland Africa 1997 54.289 1054486 3876.7685
Bahrain Asia 1987 70.750 454612 18524.0241
Hong Kong, China Asia 1952 60.960 2125900 3054.4212
Syria Asia 1972 57.296 6701172 2571.4230
Congo, Rep. Africa 1997 52.962 2800947 3484.1644
Denmark Europe 1987 74.800 5127024 25116.1758
Saudi Arabia Asia 1992 68.768 16945857 24841.6178
Cote d’Ivoire Africa 1982 53.983 9025951 2602.7102
Lesotho Africa 1992 59.685 1803195 977.4863
Mauritius Africa 1982 66.711 992040 3688.0377
Yemen, Rep. Asia 1977 44.175 8403990 1829.7652
Sierra Leone Africa 1957 31.570 2295678 1004.4844
Burundi Africa 1997 45.326 6121610 463.1151
Uruguay Americas 2007 76.384 3447496 10611.4630
Singapore Asia 1972 69.521 2152400 8597.7562
Madagascar Africa 1997 54.978 14165114 986.2959
South Africa Africa 1962 49.951 18356657 5768.7297
Panama Americas 1972 66.216 1616384 5364.2497
Portugal Europe 1962 64.390 9019800 4727.9549
Colombia Americas 2007 72.889 44227550 7006.5804
Canada Americas 1992 77.950 28523502 26342.8843
Iceland Europe 1952 72.490 147962 7267.6884
Spain Europe 1972 73.060 34513161 10638.7513
Sierra Leone Africa 1952 30.331 2143249 879.7877
Pakistan Asia 1967 49.800 60641899 942.4083
Italy Europe 1987 76.420 56729703 19207.2348
Ethiopia Africa 1972 43.515 30770372 566.2439
Ghana Africa 1997 58.556 18418288 1005.2458
Puerto Rico Americas 1977 73.440 3080828 9770.5249
United Kingdom Europe 1957 70.420 51430000 11283.1779
Costa Rica Americas 1967 65.424 1588717 4161.7278
United States Americas 1982 74.650 232187835 25009.5591
Sweden Europe 1967 74.160 7867931 15258.2970
Cambodia Asia 1997 56.534 11782962 734.2852
Burundi Africa 1977 45.910 3834415 556.1033
United Kingdom Europe 1997 77.218 58808266 26074.5314
Dominican Republic Americas 1977 61.788 5302800 2681.9889
United Kingdom Europe 1952 69.180 50430000 9979.5085
Syria Asia 2002 73.053 17155814 4090.9253

Let’s check how many levels there are and whether we need to remove unused factor levels.

nlevels(No_Oceania$continent)
## [1] 5
levels(No_Oceania$continent)
## [1] "Africa"   "Americas" "Asia"     "Europe"   "Oceania"

We see that there are still 5 levels. Let’s remove the unused factor levels.

Drop_Oceania <- No_Oceania %>% 
  droplevels()
nlevels(Drop_Oceania$continent)
## [1] 4
levels(Drop_Oceania$continent)
## [1] "Africa"   "Americas" "Asia"     "Europe"

We do note that the factor levels are ordered alphabetically. Let’s use forcats to re-order the factor levels. We can re-order in different ways.

One way to re-order is by frequency.For example, the frequency of continents in the data.

Drop_Oceania$continent %>% 
  fct_infreq() %>% 
  levels()
## [1] "Africa"   "Asia"     "Europe"   "Americas"

We can also re-order by the value of other variables in the data as life expectanc or gdp.

Drop_Oceania_1980 <- Drop_Oceania %>% 
  filter(year > 1979) %>% 
  group_by(continent, year) %>% 
  mutate(mediangdp = median(gdpPercap))

Drop_Oceania_1980
## # A tibble: 840 x 7
## # Groups:   continent, year [24]
##    country     continent  year lifeExp      pop gdpPercap mediangdp
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>     <dbl>
##  1 Afghanistan Asia       1982    39.9 12881816      978.     4107.
##  2 Afghanistan Asia       1987    40.8 13867957      852.     4106.
##  3 Afghanistan Asia       1992    41.7 16317921      649.     3726.
##  4 Afghanistan Asia       1997    41.8 22227415      635.     3645.
##  5 Afghanistan Asia       2002    42.1 25268405      727.     4091.
##  6 Afghanistan Asia       2007    43.8 31889923      975.     4471.
##  7 Albania     Europe     1982    70.4  2780097     3631.    15323.
##  8 Albania     Europe     1987    72    3075321     3739.    16215.
##  9 Albania     Europe     1992    71.6  3326498     2497.    17550.
## 10 Albania     Europe     1997    73.0  3428038     3193.    19596.
## # ... with 830 more rows
fct_reorder(Drop_Oceania_1980$continent, Drop_Oceania_1980$gdpPercap, min) %>% 
  levels() %>% head()
## [1] "Africa"   "Asia"     "Americas" "Europe"

Now we can put this into graph form.

ggplot(Drop_Oceania_1980, aes(x = year, y = Drop_Oceania_1980$mediangdp, color = fct_reorder2(continent, year, Drop_Oceania_1980$mediangdp))) +
  geom_line() +
  labs(color = "Continent")+
  xlab("Year") + ylab("Median GDP") +
  ggtitle("Median GDP 1980-2007 by Continent")

We see that reordering the levels allows the legend to be organized in the same fashion as the trendlines.

Part 2 File input and output

Let’s try saving some wrangled data into a new file.

trial<- gapminder %>%
  filter(year == "2007")

write.csv(trial, file = "assignment5_stat545")

Now let’s import this file to see if we can read it.

read.csv("assignment5_stat545")
##       X                  country continent year lifeExp        pop
## 1     1              Afghanistan      Asia 2007  43.828   31889923
## 2     2                  Albania    Europe 2007  76.423    3600523
## 3     3                  Algeria    Africa 2007  72.301   33333216
## 4     4                   Angola    Africa 2007  42.731   12420476
## 5     5                Argentina  Americas 2007  75.320   40301927
## 6     6                Australia   Oceania 2007  81.235   20434176
## 7     7                  Austria    Europe 2007  79.829    8199783
## 8     8                  Bahrain      Asia 2007  75.635     708573
## 9     9               Bangladesh      Asia 2007  64.062  150448339
## 10   10                  Belgium    Europe 2007  79.441   10392226
## 11   11                    Benin    Africa 2007  56.728    8078314
## 12   12                  Bolivia  Americas 2007  65.554    9119152
## 13   13   Bosnia and Herzegovina    Europe 2007  74.852    4552198
## 14   14                 Botswana    Africa 2007  50.728    1639131
## 15   15                   Brazil  Americas 2007  72.390  190010647
## 16   16                 Bulgaria    Europe 2007  73.005    7322858
## 17   17             Burkina Faso    Africa 2007  52.295   14326203
## 18   18                  Burundi    Africa 2007  49.580    8390505
## 19   19                 Cambodia      Asia 2007  59.723   14131858
## 20   20                 Cameroon    Africa 2007  50.430   17696293
## 21   21                   Canada  Americas 2007  80.653   33390141
## 22   22 Central African Republic    Africa 2007  44.741    4369038
## 23   23                     Chad    Africa 2007  50.651   10238807
## 24   24                    Chile  Americas 2007  78.553   16284741
## 25   25                    China      Asia 2007  72.961 1318683096
## 26   26                 Colombia  Americas 2007  72.889   44227550
## 27   27                  Comoros    Africa 2007  65.152     710960
## 28   28         Congo, Dem. Rep.    Africa 2007  46.462   64606759
## 29   29              Congo, Rep.    Africa 2007  55.322    3800610
## 30   30               Costa Rica  Americas 2007  78.782    4133884
## 31   31            Cote d'Ivoire    Africa 2007  48.328   18013409
## 32   32                  Croatia    Europe 2007  75.748    4493312
## 33   33                     Cuba  Americas 2007  78.273   11416987
## 34   34           Czech Republic    Europe 2007  76.486   10228744
## 35   35                  Denmark    Europe 2007  78.332    5468120
## 36   36                 Djibouti    Africa 2007  54.791     496374
## 37   37       Dominican Republic  Americas 2007  72.235    9319622
## 38   38                  Ecuador  Americas 2007  74.994   13755680
## 39   39                    Egypt    Africa 2007  71.338   80264543
## 40   40              El Salvador  Americas 2007  71.878    6939688
## 41   41        Equatorial Guinea    Africa 2007  51.579     551201
## 42   42                  Eritrea    Africa 2007  58.040    4906585
## 43   43                 Ethiopia    Africa 2007  52.947   76511887
## 44   44                  Finland    Europe 2007  79.313    5238460
## 45   45                   France    Europe 2007  80.657   61083916
## 46   46                    Gabon    Africa 2007  56.735    1454867
## 47   47                   Gambia    Africa 2007  59.448    1688359
## 48   48                  Germany    Europe 2007  79.406   82400996
## 49   49                    Ghana    Africa 2007  60.022   22873338
## 50   50                   Greece    Europe 2007  79.483   10706290
## 51   51                Guatemala  Americas 2007  70.259   12572928
## 52   52                   Guinea    Africa 2007  56.007    9947814
## 53   53            Guinea-Bissau    Africa 2007  46.388    1472041
## 54   54                    Haiti  Americas 2007  60.916    8502814
## 55   55                 Honduras  Americas 2007  70.198    7483763
## 56   56         Hong Kong, China      Asia 2007  82.208    6980412
## 57   57                  Hungary    Europe 2007  73.338    9956108
## 58   58                  Iceland    Europe 2007  81.757     301931
## 59   59                    India      Asia 2007  64.698 1110396331
## 60   60                Indonesia      Asia 2007  70.650  223547000
## 61   61                     Iran      Asia 2007  70.964   69453570
## 62   62                     Iraq      Asia 2007  59.545   27499638
## 63   63                  Ireland    Europe 2007  78.885    4109086
## 64   64                   Israel      Asia 2007  80.745    6426679
## 65   65                    Italy    Europe 2007  80.546   58147733
## 66   66                  Jamaica  Americas 2007  72.567    2780132
## 67   67                    Japan      Asia 2007  82.603  127467972
## 68   68                   Jordan      Asia 2007  72.535    6053193
## 69   69                    Kenya    Africa 2007  54.110   35610177
## 70   70         Korea, Dem. Rep.      Asia 2007  67.297   23301725
## 71   71              Korea, Rep.      Asia 2007  78.623   49044790
## 72   72                   Kuwait      Asia 2007  77.588    2505559
## 73   73                  Lebanon      Asia 2007  71.993    3921278
## 74   74                  Lesotho    Africa 2007  42.592    2012649
## 75   75                  Liberia    Africa 2007  45.678    3193942
## 76   76                    Libya    Africa 2007  73.952    6036914
## 77   77               Madagascar    Africa 2007  59.443   19167654
## 78   78                   Malawi    Africa 2007  48.303   13327079
## 79   79                 Malaysia      Asia 2007  74.241   24821286
## 80   80                     Mali    Africa 2007  54.467   12031795
## 81   81               Mauritania    Africa 2007  64.164    3270065
## 82   82                Mauritius    Africa 2007  72.801    1250882
## 83   83                   Mexico  Americas 2007  76.195  108700891
## 84   84                 Mongolia      Asia 2007  66.803    2874127
## 85   85               Montenegro    Europe 2007  74.543     684736
## 86   86                  Morocco    Africa 2007  71.164   33757175
## 87   87               Mozambique    Africa 2007  42.082   19951656
## 88   88                  Myanmar      Asia 2007  62.069   47761980
## 89   89                  Namibia    Africa 2007  52.906    2055080
## 90   90                    Nepal      Asia 2007  63.785   28901790
## 91   91              Netherlands    Europe 2007  79.762   16570613
## 92   92              New Zealand   Oceania 2007  80.204    4115771
## 93   93                Nicaragua  Americas 2007  72.899    5675356
## 94   94                    Niger    Africa 2007  56.867   12894865
## 95   95                  Nigeria    Africa 2007  46.859  135031164
## 96   96                   Norway    Europe 2007  80.196    4627926
## 97   97                     Oman      Asia 2007  75.640    3204897
## 98   98                 Pakistan      Asia 2007  65.483  169270617
## 99   99                   Panama  Americas 2007  75.537    3242173
## 100 100                 Paraguay  Americas 2007  71.752    6667147
## 101 101                     Peru  Americas 2007  71.421   28674757
## 102 102              Philippines      Asia 2007  71.688   91077287
## 103 103                   Poland    Europe 2007  75.563   38518241
## 104 104                 Portugal    Europe 2007  78.098   10642836
## 105 105              Puerto Rico  Americas 2007  78.746    3942491
## 106 106                  Reunion    Africa 2007  76.442     798094
## 107 107                  Romania    Europe 2007  72.476   22276056
## 108 108                   Rwanda    Africa 2007  46.242    8860588
## 109 109    Sao Tome and Principe    Africa 2007  65.528     199579
## 110 110             Saudi Arabia      Asia 2007  72.777   27601038
## 111 111                  Senegal    Africa 2007  63.062   12267493
## 112 112                   Serbia    Europe 2007  74.002   10150265
## 113 113             Sierra Leone    Africa 2007  42.568    6144562
## 114 114                Singapore      Asia 2007  79.972    4553009
## 115 115          Slovak Republic    Europe 2007  74.663    5447502
## 116 116                 Slovenia    Europe 2007  77.926    2009245
## 117 117                  Somalia    Africa 2007  48.159    9118773
## 118 118             South Africa    Africa 2007  49.339   43997828
## 119 119                    Spain    Europe 2007  80.941   40448191
## 120 120                Sri Lanka      Asia 2007  72.396   20378239
## 121 121                    Sudan    Africa 2007  58.556   42292929
## 122 122                Swaziland    Africa 2007  39.613    1133066
## 123 123                   Sweden    Europe 2007  80.884    9031088
## 124 124              Switzerland    Europe 2007  81.701    7554661
## 125 125                    Syria      Asia 2007  74.143   19314747
## 126 126                   Taiwan      Asia 2007  78.400   23174294
## 127 127                 Tanzania    Africa 2007  52.517   38139640
## 128 128                 Thailand      Asia 2007  70.616   65068149
## 129 129                     Togo    Africa 2007  58.420    5701579
## 130 130      Trinidad and Tobago  Americas 2007  69.819    1056608
## 131 131                  Tunisia    Africa 2007  73.923   10276158
## 132 132                   Turkey    Europe 2007  71.777   71158647
## 133 133                   Uganda    Africa 2007  51.542   29170398
## 134 134           United Kingdom    Europe 2007  79.425   60776238
## 135 135            United States  Americas 2007  78.242  301139947
## 136 136                  Uruguay  Americas 2007  76.384    3447496
## 137 137                Venezuela  Americas 2007  73.747   26084662
## 138 138                  Vietnam      Asia 2007  74.249   85262356
## 139 139       West Bank and Gaza      Asia 2007  73.422    4018332
## 140 140              Yemen, Rep.      Asia 2007  62.698   22211743
## 141 141                   Zambia    Africa 2007  42.384   11746035
## 142 142                 Zimbabwe    Africa 2007  43.487   12311143
##      gdpPercap
## 1     974.5803
## 2    5937.0295
## 3    6223.3675
## 4    4797.2313
## 5   12779.3796
## 6   34435.3674
## 7   36126.4927
## 8   29796.0483
## 9    1391.2538
## 10  33692.6051
## 11   1441.2849
## 12   3822.1371
## 13   7446.2988
## 14  12569.8518
## 15   9065.8008
## 16  10680.7928
## 17   1217.0330
## 18    430.0707
## 19   1713.7787
## 20   2042.0952
## 21  36319.2350
## 22    706.0165
## 23   1704.0637
## 24  13171.6388
## 25   4959.1149
## 26   7006.5804
## 27    986.1479
## 28    277.5519
## 29   3632.5578
## 30   9645.0614
## 31   1544.7501
## 32  14619.2227
## 33   8948.1029
## 34  22833.3085
## 35  35278.4187
## 36   2082.4816
## 37   6025.3748
## 38   6873.2623
## 39   5581.1810
## 40   5728.3535
## 41  12154.0897
## 42    641.3695
## 43    690.8056
## 44  33207.0844
## 45  30470.0167
## 46  13206.4845
## 47    752.7497
## 48  32170.3744
## 49   1327.6089
## 50  27538.4119
## 51   5186.0500
## 52    942.6542
## 53    579.2317
## 54   1201.6372
## 55   3548.3308
## 56  39724.9787
## 57  18008.9444
## 58  36180.7892
## 59   2452.2104
## 60   3540.6516
## 61  11605.7145
## 62   4471.0619
## 63  40675.9964
## 64  25523.2771
## 65  28569.7197
## 66   7320.8803
## 67  31656.0681
## 68   4519.4612
## 69   1463.2493
## 70   1593.0655
## 71  23348.1397
## 72  47306.9898
## 73  10461.0587
## 74   1569.3314
## 75    414.5073
## 76  12057.4993
## 77   1044.7701
## 78    759.3499
## 79  12451.6558
## 80   1042.5816
## 81   1803.1515
## 82  10956.9911
## 83  11977.5750
## 84   3095.7723
## 85   9253.8961
## 86   3820.1752
## 87    823.6856
## 88    944.0000
## 89   4811.0604
## 90   1091.3598
## 91  36797.9333
## 92  25185.0091
## 93   2749.3210
## 94    619.6769
## 95   2013.9773
## 96  49357.1902
## 97  22316.1929
## 98   2605.9476
## 99   9809.1856
## 100  4172.8385
## 101  7408.9056
## 102  3190.4810
## 103 15389.9247
## 104 20509.6478
## 105 19328.7090
## 106  7670.1226
## 107 10808.4756
## 108   863.0885
## 109  1598.4351
## 110 21654.8319
## 111  1712.4721
## 112  9786.5347
## 113   862.5408
## 114 47143.1796
## 115 18678.3144
## 116 25768.2576
## 117   926.1411
## 118  9269.6578
## 119 28821.0637
## 120  3970.0954
## 121  2602.3950
## 122  4513.4806
## 123 33859.7484
## 124 37506.4191
## 125  4184.5481
## 126 28718.2768
## 127  1107.4822
## 128  7458.3963
## 129   882.9699
## 130 18008.5092
## 131  7092.9230
## 132  8458.2764
## 133  1056.3801
## 134 33203.2613
## 135 42951.6531
## 136 10611.4630
## 137 11415.8057
## 138  2441.5764
## 139  3025.3498
## 140  2280.7699
## 141  1271.2116
## 142   469.7093

Part 3 Visualization design

Here is an old graph from assignment 3. It looks at the weighted mean life expectancy for continents over time for the gapminder data set.

gapminder %>% 
 group_by(continent, year) %>%
 summarise(mean_lifeExp_weighted = weighted.mean(lifeExp, pop)) %>% 
 ggplot(aes(year, mean_lifeExp_weighted))+
  geom_point(aes(colour = continent))

What can be improved? 1) We can add a trend line so we can estimate what is happening between years. 2) We can change the axis titles and add a graph title. 3) We can re-order the factor levels so the legend is presented in logical order. 4) We can change the scale of the axes to show every 5 years of life. 5) We can change the graphs theme.

gapminder %>% 
 group_by(continent, year) %>%
 summarise(mean_lifeExp_weighted = weighted.mean(lifeExp, pop)) %>% 
 ggplot(aes(year, mean_lifeExp_weighted, color = fct_reorder2(continent, year, mean_lifeExp_weighted)))+
  geom_point(aes(colour = continent))+
  geom_line(aes(colour = continent))+
  scale_y_continuous(breaks=5*(1:17))+
  xlab("Year")+
    ylab("Weighted Mean Life Expectancy")+
  labs(color = "Continent")+
  ggtitle("Weighted Mean Life Expectancy vs. Time (Years)")+
  theme_minimal()

**Let’s try a different graph and also use plotly to make it more interactive.*Now we can make an interactive plot using plotly.**

Here is the original graph:

gapminder %>%
 group_by(year) %>% 
 mutate(median = median(lifeExp)) %>%
 ggplot(aes(year, lifeExp)) +
 geom_jitter(aes(colour = (lifeExp < median)), alpha = 0.5)+
  facet_wrap(~ continent)

Here is the cleaner version.

gapminder %>%
 group_by(year) %>% 
 mutate(median = median(lifeExp)) %>%
 ggplot(aes(year, lifeExp)) +
 geom_jitter(aes(colour = (lifeExp < median)), alpha = 0.5)+
  facet_wrap(~ continent,nrow = 5, ncol = 1)+
  scale_y_continuous(breaks=10*(1:9))+ #change the scale
  xlab("Year")+ #change the titles
  ylab("Life Expectancy")+
  ggtitle("Country life expectancy over time")+
  scale_colour_discrete(name  ="CountrylLife expectancy > worldwide median?")+ #make the legend clearer
  theme(legend.position="right") #select legend position

Now, let’s make this into a plotly graph.

#First, let's save the above graph into "plotly1"
plotly1 <- gapminder %>%
 group_by(year) %>% 
 mutate(median = median(lifeExp)) %>%
 ggplot(aes(year, lifeExp)) +
 geom_jitter(aes(colour = (lifeExp < median)), alpha = 0.5)+
  facet_wrap(~ continent,nrow = 5, ncol = 1)+
  scale_y_continuous(breaks=10*(1:9))+ 
  xlab("Year")+
  ylab("Life Expectancy")+
  ggtitle("Country life expectancy over time")+
  scale_colour_discrete(name  ="CountrylLife expectancy > worldwide median?")+
  theme(legend.position="right") 

#Now open plotly package.
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#GEnerating plotly graph.
ggplotly(plotly1)

**We do notice that plotly has some nice features. However, visualization is not as good. The legend is cut off. We can see if moving the legend elsewhere will improve this visualization.

plotly2 <- gapminder %>%
 group_by(year) %>% 
 mutate(median = median(lifeExp)) %>%
 ggplot(aes(year, lifeExp)) +
 geom_jitter(aes(colour = (lifeExp < median)), alpha = 0.5)+
  facet_wrap(~ continent,nrow = 5, ncol = 1)+
  scale_y_continuous(breaks=10*(1:9))+ 
  xlab("Year")+
  ylab("Life Expectancy")+
  ggtitle("Country life expectancy over time")+
  scale_colour_discrete(name  ="CountrylLife expectancy > worldwide median?")+
  theme(legend.position="bottom") 

#GEnerating plotly graph.
ggplotly(plotly2)

Hmmmm. It appears plotly dose not have all the aesthetic options that ggplot does. Anyways, let’s save this as a local html file.

ggplotly(plotly1) %>% 
  htmlwidgets::saveWidget("plotly1")

Let’s save this plot using ggsave(). This will generate an image file where we can specify certain aspects such as dimension and resolution.

plotly1

ggsave("plotly1.png", width = 20, height = 20, units = "cm", dpi = 300)